#导入依赖包
from vllm import LLM, SamplingParams
import torch
# Sample prompts.
prompts = [
    "Hello, who are you",
    "The capital of France is",
    "The future of AI is",
]
# Create a sampling params object.
#设置参数
sampling_params = SamplingParams(temperature=0.85, top_p=0.95, max_tokens=512)
#sampling_params = SamplingParams(temperature=0.8, max_tokens=100)

# 初始化模型，选择vllm支持的模型
#llm = LLM(model="/dev/shm/psj/Mistral-7B-Instruct-v0.3", dtype = torch.float16)
llm = LLM(model="Qwen/Qwen1.5-4B-chat", trust_remote_code=True, dtype=torch.float16)
#llm1 = LLM(model="Llama/Llama3.1-8B-Chinese-chat", trust_remote_code = True, dtype=torch.float32)
# Generate texts from the prompts. The output is a list of RequestOutput objects
# that contain the prompt, generated text, and other information.
#生成结果
outputs = llm.generate(prompts, sampling_params)
# Print the outputs.
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
